1 Data

Preparations in file 00.Rmd

dog_ownership_cost <- read_rds("data/dog_ownership_cost.Rds") %>% 
  select(-cost_compared_to_other_breeds)

length(unique(dog_ownership_cost$SSC_NAME16))
[1] 187
length(unique(dog_ownership_cost$dog_breed))
[1] 182
SSC <- read_rds("data/geo/SSC.Rds")

length(unique(SSC$SSC_NAME16))
[1] 184
wide_cost_n <- read_rds("data/wide_cost_n.Rds")
wide_cost_p <- read_rds("data/wide_cost_p.Rds")

2 All Brisbane dogs combined

2.1 Dogs per capita

Summarizing all dogs, and expensive only.

dog_ownership_agg <- dog_ownership_cost %>% 
  group_by(SSC_NAME16) %>% 
  summarise(dogs_total = n(),
            dogs_exp = sum(expensive))

SSC %<>% 
  left_join(dog_ownership_agg)

2.1.1 Missing SEIFA

Areas with no URP/SEIFA but having (small amount of) dogs:

          SSC_NAME16 dogs_total
1         Eagle Farm          1
2 Enoggera Reservoir          3
3          Karawatha          6
4             Lytton          1

Since these areas have no SEIFA they will be excluded.

2.1.2 Missing dogs

Area with low URP and no dogs at all:

     SSC_NAME16 dogs_total URP
1 England Creek         NA  33

NA was assumed to mean 0 here.

2.1.3 Ranking

2.1.4 Map

2.2 Proportion of expensive dogs

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.1178  0.1366  0.1403  0.1650  0.2636       1 

2.2.1 Ranking

2.2.2 Map

3 Association with SEIFA

3.1 Functions

seifa_means <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>% 
    st_drop_geometry() %>% 
    group_by(!!myenc) %>% 
    summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
              sd = sd(dogs_exp_prop, na.rm = TRUE),
              p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
              p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
              p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>% 
    ungroup()
}

seifa_cor <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>%
    st_drop_geometry() %>%
    select(!!myenc, dogs_exp_prop) %>%
    mutate_if(is.factor, as.numeric) %>%
    correlation(method = "kendall")
  
}

seifa_plot <- function (seifa_index) {
  
  model <- eval(substitute(lm(dogs_exp_prop ~ seifa_index, 
                              data = SSC, na.action = na.omit)))
  means <- estimate_means(model)
  
  myenc <- enquo(seifa_index)
  
  ggplot(SSC,
         aes(x = !!myenc,
             y = dogs_exp_prop,
             fill = !!myenc)) +
    geom_violin(alpha = 0.66) +
    geom_jitter2(width = 0.05, alpha = 0.5) +
    geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
    geom_pointrange(data = means,
                    aes(y = Mean, ymin = CI_low, ymax = CI_high),
                    size = 1,
                    color = "white") + 
    scale_fill_brewer(palette = "BrBG") +
    ylab("Proportion of expensive dogs") +
    theme_modern()
  
}

3.2 IRSD

3.2.1 Recalculated

seifa_means(IRSD_d)
# A tibble: 10 x 6
   IRSD_d  mean     sd    p25   p50   p75
   <fct>  <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1      0.161 0.0443 0.146  0.159 0.184
 2 2      0.130 0.0833 0.0913 0.155 0.170
 3 3      0.147 0.0258 0.129  0.140 0.160
 4 4      0.147 0.0411 0.121  0.144 0.173
 5 5      0.145 0.0305 0.130  0.137 0.155
 6 6      0.142 0.0350 0.115  0.136 0.156
 7 7      0.127 0.0213 0.113  0.128 0.136
 8 8      0.148 0.0268 0.125  0.148 0.163
 9 9      0.126 0.0351 0.112  0.122 0.141
10 10     0.127 0.0336 0.104  0.114 0.152
seifa_cor(IRSD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSD_d     | dogs_exp_prop | -0.19 | [-0.28, -0.10] | -3.69 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d)

3.2.2 Original

seifa_means(IRSD_d_orig)
# A tibble: 10 x 6
   IRSD_d_orig  mean     sd    p25   p50   p75
   <fct>       <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1           0.185 0.0179 0.179  0.193 0.195
 2 2           0.164 0.0605 0.138  0.161 0.179
 3 3           0.142 0.0540 0.125  0.159 0.180
 4 4           0.161 0.0163 0.151  0.158 0.159
 5 5           0.150 0.0654 0.122  0.160 0.178
 6 6           0.122 0.0813 0.0962 0.136 0.166
 7 7           0.143 0.0346 0.124  0.142 0.166
 8 8           0.150 0.0312 0.130  0.140 0.157
 9 9           0.136 0.0321 0.112  0.131 0.149
10 10          0.134 0.0316 0.113  0.127 0.156
seifa_cor(IRSD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1  |    Parameter2 |   tau |         95% CI |     z |         p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.08] | -3.31 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d_orig)

3.3 IRSAD

3.3.1 Recalculated

seifa_means(IRSAD_d)
# A tibble: 10 x 6
   IRSAD_d  mean     sd   p25   p50   p75
   <fct>   <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1       0.156 0.0641 0.138 0.162 0.186
 2 2       0.140 0.0689 0.130 0.156 0.173
 3 3       0.143 0.0232 0.127 0.144 0.157
 4 4       0.157 0.0408 0.135 0.158 0.185
 5 5       0.139 0.0311 0.128 0.135 0.146
 6 6       0.137 0.0262 0.115 0.129 0.154
 7 7       0.136 0.0354 0.115 0.129 0.146
 8 8       0.141 0.0260 0.123 0.132 0.155
 9 9       0.132 0.0370 0.114 0.124 0.154
10 10      0.121 0.0315 0.101 0.112 0.127
seifa_cor(IRSAD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSAD_d    | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d)

3.3.2 Original

seifa_means(IRSAD_d_orig)
# A tibble: 10 x 6
   IRSAD_d_orig  mean     sd    p25   p50   p75
   <fct>        <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1            0.185 0.0179 0.179  0.193 0.195
 2 2            0.145 0.0955 0.0751 0.131 0.201
 3 3            0.161 0.0248 0.143  0.161 0.179
 4 4            0.138 0.0183 0.131  0.138 0.144
 5 5            0.153 0.0777 0.151  0.158 0.176
 6 6            0.126 0.0875 0.0641 0.159 0.177
 7 7            0.157 0.0258 0.136  0.156 0.173
 8 8            0.147 0.0297 0.127  0.144 0.155
 9 9            0.144 0.0352 0.129  0.143 0.162
10 10           0.133 0.0319 0.112  0.127 0.149
seifa_cor(IRSAD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1   |    Parameter2 |   tau |         95% CI |     z |         p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.13] | -3.98 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d_orig)

3.4 IER

3.4.1 Recalculated

seifa_means(IER_d)
# A tibble: 10 x 6
   IER_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.162 0.0452 0.136 0.165 0.187
 2 2     0.137 0.0331 0.126 0.134 0.154
 3 3     0.138 0.0210 0.118 0.137 0.152
 4 4     0.144 0.0410 0.115 0.129 0.152
 5 5     0.125 0.0702 0.110 0.138 0.165
 6 6     0.152 0.0288 0.129 0.156 0.172
 7 7     0.132 0.0424 0.113 0.139 0.150
 8 8     0.132 0.0256 0.115 0.129 0.153
 9 9     0.137 0.0425 0.114 0.122 0.146
10 10    0.144 0.0439 0.114 0.153 0.171
seifa_cor(IER_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d      | dogs_exp_prop | -0.08 | [-0.18, 0.02] | -1.56 | 0.118

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d)

3.4.2 Original

seifa_means(IER_d_orig)
# A tibble: 10 x 6
   IER_d_orig  mean     sd   p25   p50   p75
   <fct>      <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1          0.161 0.0440 0.140 0.162 0.183
 2 2          0.137 0.0316 0.124 0.132 0.154
 3 3          0.145 0.0387 0.117 0.137 0.163
 4 4          0.134 0.0263 0.114 0.128 0.144
 5 5          0.130 0.0693 0.114 0.143 0.173
 6 6          0.145 0.0297 0.127 0.138 0.161
 7 7          0.144 0.0340 0.128 0.141 0.154
 8 8          0.129 0.0465 0.127 0.130 0.152
 9 9          0.139 0.0369 0.115 0.132 0.164
10 10         0.138 0.0412 0.111 0.127 0.164
seifa_cor(IER_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.08 | [-0.18, 0.01] | -1.57 | 0.116

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d_orig)

3.5 IEO

3.5.1 Recalculated

seifa_means(IEO_d)
# A tibble: 10 x 6
   IEO_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.156 0.0646 0.133 0.165 0.188
 2 2     0.166 0.0265 0.152 0.159 0.178
 3 3     0.132 0.0683 0.123 0.144 0.162
 4 4     0.146 0.0292 0.123 0.145 0.165
 5 5     0.146 0.0385 0.130 0.147 0.162
 6 6     0.153 0.0306 0.131 0.152 0.171
 7 7     0.133 0.0276 0.117 0.129 0.136
 8 8     0.124 0.0304 0.115 0.125 0.132
 9 9     0.127 0.0209 0.113 0.127 0.132
10 10    0.119 0.0302 0.106 0.112 0.119
seifa_cor(IEO_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d      | dogs_exp_prop | -0.30 | [-0.39, -0.21] | -5.84 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d)

3.5.2 Original

seifa_means(IEO_d_orig)
# A tibble: 10 x 6
   IEO_d_orig   mean     sd    p25    p50    p75
   <fct>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1          0.153  0.0665 0.138  0.179  0.194 
 2 2          0.175  0.0911 0.131  0.180  0.222 
 3 3          0.0625 0.0884 0.0312 0.0625 0.0938
 4 4          0.172  0.0393 0.145  0.165  0.188 
 5 5          0.168  0.0221 0.151  0.171  0.175 
 6 6          0.166  0.0260 0.161  0.176  0.182 
 7 7          0.136  0.0733 0.127  0.154  0.159 
 8 8          0.150  0.0320 0.132  0.148  0.162 
 9 9          0.147  0.0331 0.130  0.147  0.165 
10 10         0.130  0.0300 0.112  0.127  0.144 
seifa_cor(IEO_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.35, -0.17] | -4.82 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d_orig)

4 PCA

data <- 
  # wide_cost_n %>%
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

# View(cov(data))
pca <- principal_components(data, 
                            # standardize = FALSE,
                            n = "auto")
pca
# Loadings from Principal Component Analysis (no rotation)

Variable              |    PC1    | Complexity
----------------------------------------------
akita                 |   -0.10   |    1.00   
british_bulldog       |   0.21    |    1.00   
dogue_de_bordeaux     |   0.14    |    1.00   
french_bulldog        |   0.54    |    1.00   
german_shepherd       |   -0.78   |    1.00   
irish_wolfhound       |   -0.46   |    1.00   
maltese               |   0.80    |    1.00   
rottweiler            |   -0.58   |    1.00   
samoyed               |   0.17    |    1.00   
yorkshire_terrier     |   0.35    |    1.00   
chinese_crested_dog   | -1.92e-03 |    1.00   
chow_chow             |   0.20    |    1.00   
lowchen               |   0.20    |    1.00   
saluki                |   0.07    |    1.00   
pharaoh_hound         |   -0.22   |    1.00   
st_bernard            |   -0.32   |    1.00   
tibetan_mastiff       |   -0.04   |    1.00   
canadian_eskimo_dog   |   0.08    |    1.00   
black_russian_terrier |   0.10    |    1.00   

The unique principal component accounted for 13.57% of the total variance of the original data.
summary(pca)
# (Explained) Variance of Components

Parameter                       |   PC1
---------------------------------------
Eigenvalues                     | 2.578
Variance Explained              | 0.136
Variance Explained (Cumulative) | 0.136
Variance Explained (Proportion) | 0.136
plot(pca)

pca_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  select(SSC_CODE16:caution) %>% 
  mutate(pca_raw = predict(pca)$Component_1, 
         pca = ntile(pca_raw, 10)) 

4.1 IRSD_d

ggplot(pca_results, aes(x = IRSD, y = pca_raw)) + 
  geom_point()

pca == IRSD_d <lgl> 
# total N=180 valid N=179 mean=0.13 sd=0.34

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 155 | 86.11 |   86.59 |  86.59
TRUE  |  24 | 13.33 |   13.41 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.2 IRSAD_d

ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) + 
  geom_point()

pca == IRSAD_d <lgl> 
# total N=180 valid N=179 mean=0.14 sd=0.35

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 154 | 85.56 |   86.03 |  86.03
TRUE  |  25 | 13.89 |   13.97 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.3 IER_d

ggplot(pca_results, aes(x = IER, y = pca_raw)) + 
  geom_point()

pca == IER_d <lgl> 
# total N=180 valid N=179 mean=0.11 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 159 | 88.33 |   88.83 |  88.83
TRUE  |  20 | 11.11 |   11.17 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

4.4 IEO_d

ggplot(pca_results, aes(x = IEO, y = pca_raw)) + 
  geom_point()

pca == IEO_d <lgl> 
# total N=180 valid N=179 mean=0.18 sd=0.38

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 147 | 81.67 |   82.12 |  82.12
TRUE  |  32 | 17.78 |   17.88 | 100.00
<NA>  |   1 |  0.56 |    <NA> |   <NA>

5 Clustering

data <- 
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

5.1 No of clusters

n <- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n
# Method Agreement Procedure:

The choice of 2 clusters is supported by 7 (33.33%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Mcclain, Dunn).
plot(n)

5.2 K-Means

rez_kmeans <- cluster_analysis(data, n = 2, method = "kmeans")

rez_kmeans
# Clustering Solution

The 2 clusters accounted for 5.70% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    44 |     1127.34 | -0.36 |            0.88 |              0.07 |           1.01 |           -0.69 |           -0.25 |    0.12 |      -0.32 |    0.31 |              0.47 |                0.21 |      0.20 |    0.53 |  -0.14 |          0.07 |      -0.14 |            0.11 |                0.13 |                 -0.07
2       |   136 |     2079.91 |  0.12 |           -0.28 |             -0.02 |          -0.33 |            0.22 |            0.08 |   -0.04 |       0.10 |   -0.10 |             -0.15 |               -0.07 |     -0.07 |   -0.17 |   0.04 |         -0.02 |       0.05 |           -0.03 |               -0.04 |                  0.02

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |             193.745 |           3207.255 | 0.057

# You can access the predicted clusters via 'predict()'.
plot(rez_kmeans)

plot(summary(rez_kmeans))

cluster_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  mutate(cluster = predict(rez_kmeans)) 

aggregate(data = cluster_results, german_shepherd ~ cluster, mean)
  cluster german_shepherd
1       1       0.1226264
2       2       0.2429998
aggregate(data = cluster_results, maltese ~ cluster, mean) 
  cluster   maltese
1       1 0.5415989
2       2 0.5198849

5.3 Hierarchical Clustering

rez_hclust <- cluster_analysis(data, n = 2, method = "hclust")

rez_hclust
# Clustering Solution

The 2 clusters accounted for 4.56% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |  maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   178 |     3144.63 | 4.91e-03 |           -0.02 |          6.51e-03 |       5.81e-03 |            0.01 |        6.38e-03 | 6.68e-03 |      -0.01 |   -0.02 |             -0.04 |               -0.07 |  5.26e-03 | 4.87e-03 | 2.80e-03 |         -0.05 |   3.53e-03 |        2.74e-03 |            1.30e-03 |              1.97e-03
2       |     2 |      101.45 |    -0.44 |            2.02 |             -0.58 |          -0.52 |           -1.16 |           -0.57 |    -0.59 |       0.90 |    1.60 |              3.52 |                5.82 |     -0.47 |    -0.43 |    -0.25 |          4.41 |      -0.31 |           -0.24 |               -0.12 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |             154.924 |           3246.076 | 0.046

# You can access the predicted clusters via 'predict()'.
plot(rez_hclust)

5.4 Hierarchical K-Means

rez_hkmeans <- cluster_analysis(data, n = 2, method = "hkmeans")

rez_hkmeans
# Clustering Solution

The 2 clusters accounted for 5.82% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   178 |     3133.72 | 4.91e-03 |           -0.01 |          6.51e-03 |       4.65e-03 |       -8.26e-03 |        2.37e-03 |    0.02 |      -0.01 | 6.15e-03 |          6.33e-03 |               -0.04 |  5.26e-03 | 4.87e-03 | 2.80e-03 |         -0.10 |      -0.01 |           -0.03 |            1.30e-03 |              1.97e-03
2       |     2 |       69.36 |    -0.44 |            1.15 |             -0.58 |          -0.41 |            0.74 |           -0.21 |   -1.78 |       1.14 |    -0.55 |             -0.56 |                3.17 |     -0.47 |    -0.43 |    -0.25 |          8.56 |       0.98 |            2.33 |               -0.12 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |             197.920 |           3203.080 | 0.058

# You can access the predicted clusters via 'predict()'.
plot(rez_hkmeans)

5.5 K-Medoids (PAM)

rez_pam <- cluster_analysis(data, n = 2, method = "pam")

rez_pam
# Clustering Solution

The 2 clusters accounted for 4.07% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    98 |     1581.65 | -0.01 |           -0.51 |             -0.11 |          -0.25 |            0.26 |           -0.09 |    0.04 |       0.05 |    0.03 |         -7.38e-03 |               -0.14 |     -0.25 |   -0.36 |   0.08 |         -0.03 |       0.10 |            0.02 |               -0.01 |                 -0.06
2       |    82 |     1680.86 |  0.01 |            0.61 |              0.13 |           0.30 |           -0.30 |            0.11 |   -0.04 |      -0.05 |   -0.03 |          8.82e-03 |                0.17 |      0.29 |    0.43 |  -0.09 |          0.03 |      -0.12 |           -0.03 |                0.02 |                  0.08

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |             138.493 |           3262.507 | 0.041

# You can access the predicted clusters via 'predict()'.
plot(rez_pam)

5.6 Bootstrapped Hierarchical Clustering

rez_hclust2 <- cluster_analysis(data, 
                                n = NULL, 
                                method = "hclust", 
                                iterations = 500,
                                ci = 0.90)

rez_hclust2
# Clustering Solution

The 9 clusters accounted for 7.68% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler |   samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |   160 |     3105.01 | -0.07 |            0.05 |              0.04 |          -0.02 |            0.04 |            0.06 |   -0.09 |       0.05 | -1.50e-04 |              0.03 |                0.02 |      0.03 |    0.05 |   0.03 |          0.02 |       0.03 |            0.02 |                0.01 |                 -0.06
1       |     2 |        0.00 | -0.44 |           -1.20 |             -0.58 |          -0.98 |            2.19 |           -0.57 |   -0.18 |      -1.25 |     -0.55 |             -0.56 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
2       |     2 |        0.21 | -0.44 |            0.53 |             -0.58 |          -0.12 |           -0.42 |           -0.57 |    0.87 |      -0.07 |     -0.42 |             -0.04 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
3       |     2 |        0.32 | -0.28 |           -0.33 |              0.27 |          -0.46 |           -0.05 |           -0.28 |    0.42 |       0.19 |     -0.45 |             -0.19 |                0.18 |      0.24 |    0.15 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
4       |     2 |        0.77 |  0.56 |           -0.21 |             -0.39 |          -0.83 |           -0.47 |           -0.38 |    1.10 |      -0.21 |     -0.11 |              0.05 |               -0.09 |      0.27 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |            0.96 |               -0.12 |                 -0.18
5       |     2 |        1.12 |  0.98 |            0.83 |              1.02 |           0.02 |           -0.47 |            0.01 |   -0.05 |       0.47 |     -0.11 |             -0.44 |                1.48 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
6       |     2 |        1.58 |  3.59 |           -0.49 |             -0.58 |          -0.22 |            0.36 |           -0.57 |   -0.23 |      -0.08 |      2.46 |             -0.56 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
7       |     2 |        4.47 |  2.59 |           -0.74 |             -0.58 |          -0.73 |           -0.28 |           -0.41 |    0.87 |      -0.06 |     -0.19 |             -0.20 |               -0.42 |      0.22 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                  6.45
8       |     6 |       26.31 | -0.44 |           -0.71 |             -0.58 |           1.61 |           -1.27 |           -0.57 |    1.56 |      -0.89 |     -0.21 |             -0.02 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.14 |           -0.24 |               -0.12 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |             261.207 |             34.778 | 0.077

# You can access the predicted clusters via 'predict()'.
plot(rez_hclust2)

5.7 DBSCAN

eps <- n_clusters_dbscan(data, min_size = 0.01) 

eps
The DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 10.1333352087498 (with min. cluster size set to 2), which corresponds to 0 clusters.
plot(eps)

rez_dbscan <- cluster_analysis(data, method = "dbscan", dbscan_eps = 5)

rez_dbscan
# Clustering Solution

The 2 clusters accounted for 2.82% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |   lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |    38 |     2138.96 |  0.35 |            0.12 |              0.09 |           0.21 |            0.32 |            0.12 |   -0.72 |       0.05 |    0.26 |              0.41 |                0.27 |      0.02 | -6.68e-03 |   0.35 |          0.28 |       0.42 |            0.33 |                0.37 |                  0.49
1       |   142 |     1166.13 | -0.09 |           -0.03 |             -0.02 |          -0.06 |           -0.09 |           -0.03 |    0.19 |      -0.01 |   -0.07 |             -0.11 |               -0.07 | -4.45e-03 |  1.79e-03 |  -0.09 |         -0.07 |      -0.11 |           -0.09 |               -0.10 |                 -0.13

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |              95.918 |           1166.126 | 0.028

# You can access the predicted clusters via 'predict()'.
plot(rez_dbscan)

5.8 Hierarchical K-Means

rez_hdbscan <- cluster_analysis(data, method = "hdbscan")

rez_hdbscan
# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |   maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |   lowchen |    saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |   180 |        3401 | 3.67e-17 |        7.76e-17 |         -6.48e-17 |      -3.38e-17 |       -5.59e-17 |       -4.32e-17 | -6.44e-17 |  -4.31e-17 | 2.14e-17 |          3.45e-17 |           -3.28e-17 | -9.67e-18 | -2.51e-17 | -1.17e-17 |     -7.91e-18 |  -8.96e-18 |        1.19e-18 |           -7.56e-18 |             -4.30e-18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |               0.000 |              0.000 | 0.000

# You can access the predicted clusters via 'predict()'.
# plot(rez_hdbscan)

5.9 K-Medoids with estimation of number of clusters (pamk)

rez_pamk <- cluster_analysis(data, method = "pamk")

rez_pamk 
# Clustering Solution

The 10 clusters accounted for 38.96% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |   maltese | rottweiler |   samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    63 |      494.10 |  0.07 |           -0.43 |             -0.07 |          -0.31 |            0.08 |           -0.12 |      0.30 |       0.01 |     -0.12 |             -0.12 |               -0.09 |     -0.38 |   -0.33 |  -0.17 |         -0.07 |      -0.02 |           -0.01 |               -0.12 |                 -0.18
10      |     1 |        0.00 | -0.44 |            3.51 |             -0.58 |          -0.98 |           -1.09 |           -0.57 |     -1.20 |       3.05 |     -0.55 |             -0.56 |                6.76 |     -0.47 |   -0.43 |  -0.25 |          9.00 |      -0.31 |           -0.24 |               -0.12 |                 -0.18
2       |    10 |      108.05 | -0.32 |            0.34 |             -0.31 |           2.83 |           -1.07 |           -0.50 |      0.43 |      -0.85 |     -0.31 |              0.50 |                0.06 |     -0.47 |    0.15 |  -0.12 |         -0.18 |      -0.21 |           -0.24 |               -0.12 |                  0.66
3       |    46 |      496.40 |  0.08 |            0.50 |              0.36 |       8.09e-03 |           -0.22 |            0.13 | -7.48e-03 |       0.09 |     -0.23 |             -0.12 |                0.16 |     -0.21 |    0.80 |  -0.07 |         -0.03 |      -0.15 |           -0.14 |               -0.12 |                 -0.12
4       |    12 |      358.09 | -0.06 |           -0.46 |             -0.42 |          -0.76 |            2.40 |            1.38 |     -2.65 |       1.26 |     -0.44 |             -0.56 |               -0.25 |     -0.30 |   -0.43 |  -0.25 |         -0.18 |       1.10 |           -0.24 |               -0.12 |                 -0.18
5       |    35 |      470.89 | -0.15 |            0.24 |             -0.11 |           0.07 |           -0.35 |           -0.23 |      0.23 |      -0.28 |      0.86 |              0.46 |               -0.11 |      1.22 |   -0.26 |  -0.07 |         -0.14 |      -0.09 |           -0.21 |               -0.05 |                 -0.18
6       |     4 |       99.41 | -0.44 |            0.42 |             -0.20 |           0.14 |            0.44 |            0.18 |     -0.59 |      -0.50 |     -0.55 |             -0.13 |               -0.42 |      0.17 |   -0.12 |  -0.25 |          1.89 |       0.59 |            5.65 |               -0.12 |                 -0.18
7       |     4 |       17.65 | -0.44 |           -0.77 |              0.13 |          -0.19 |           -0.09 |           -0.06 |      0.37 |      -0.39 | -7.94e-03 |             -0.05 |               -0.42 |     -0.04 |   -0.43 |   5.65 |         -0.18 |      -0.05 |           -0.24 |               -0.12 |                 -0.18
8       |     3 |       20.47 |  1.58 |           -0.89 |              0.10 |          -0.51 |           -0.16 |           -0.34 |      0.52 |      -0.11 |     -0.07 |              0.27 |                0.16 | -8.46e-03 |    0.12 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |               -0.12 |                  6.65
9       |     2 |       10.91 |  0.30 |           -0.69 |              0.21 |           0.11 |           -0.20 |           -0.39 |      0.73 |      -0.48 |     -0.23 |             -0.56 |                0.36 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.31 |           -0.24 |                9.14 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |            1325.018 |           2075.982 | 0.390

# You can access the predicted clusters via 'predict()'.
plot(rez_pamk)

5.10 Mixture

p_load(mclust)

rez_mixture <- cluster_analysis(data, method = "mixture")

rez_mixture
# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |   maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |   lowchen |    saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   180 |        3401 | 3.67e-17 |        7.76e-17 |         -6.48e-17 |      -3.38e-17 |       -5.59e-17 |       -4.32e-17 | -6.44e-17 |  -4.31e-17 | 2.14e-17 |          3.45e-17 |           -3.28e-17 | -9.67e-18 | -2.51e-17 | -1.17e-17 |     -7.91e-18 |  -8.96e-18 |        1.19e-18 |           -7.56e-18 |             -4.30e-18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3401.000          |               0.000 |           3401.000 | 0.000

# You can access the predicted clusters via 'predict()'.
plot(rez_mixture)

5.11 Metaclustering

list_of_results <- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
                        rez_hclust2, rez_dbscan, rez_hdbscan, rez_mixture)

probability_matrix <- cluster_meta(list_of_results)

heatmap(probability_matrix, scale = "none", 
        col = grDevices::hcl.colors(256, palette = "inferno"))

6 Computing Environment

 R version 4.1.2 (2021-11-01)
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 Running under: Windows 10 x64 (build 18363)
 
 Matrix products: default
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
  [1] mclust_5.4.9      parameters_0.16.0 modelbased_0.9.0  see_0.6.8        
  [5] correlation_0.7.1 tmap_3.3-2        sf_1.0-5          DT_0.20          
  [9] sjPlot_2.8.10     sjmisc_2.8.9      scales_1.1.1      magrittr_2.0.1   
 [13] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.7       purrr_0.3.4      
 [17] readr_2.1.1       tidyr_1.1.4       tibble_3.1.6      ggplot2_3.3.5    
 [21] tidyverse_1.3.1   pacman_0.5.1     
 
To cite R in publications use:

R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

To cite the ggplot2 package in publications use:

Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.